*Code used to compile data used in Corporate Taxes and Initial Public Offerings: Evidence from the Tax Cuts and Jobs Act
*Authors: Michelle Hutchens and Alexander Edwards
*Data Analysis completed by Michelle Hutchens

********************************************************************************
/*Merging Data Files*/

*Start with data from Securities Data Company (SDC) Platinum 
*intial data collected for IPOs completed by U.S. entities through December 2018
*appended data collected for IPOs completed by U.S. entities during 2019
*required stock be traded on a U.S. Exchange (exchange = NYSE Amex, NYSE MKT, NASDAQ, New York)

drop if tic==""
drop if tic=="NA"
drop if cik==""
drop if issue_date==.

sort issue_date cik
quietly by issue_date cik: gen dup = cond(_N==1,0,_n)
drop if dup>1

*Merged firm-year data from compustat north america
*Merged firm-month data from CRSP

*merged underwriter rank and age files from Jay Ritter
/*for IPOs where age details were missing from Ritter files, the founding year was manually pulled from Form S-1/S-1A from the SEC EDGAR system*/
replace Founding=1998 if uniqueid==1961
replace Founding=2003 if uniqueid==2999
replace Founding=1996 if uniqueid==1953
replace Founding=2004 if uniqueid==2082
replace Founding=1981 if uniqueid==1929
replace Founding=2006 if uniqueid==3393
replace Founding=2006 if uniqueid==2970
replace Founding=2002 if uniqueid==2788
replace Founding=1866 if uniqueid==2348
replace Founding=2006 if uniqueid==3589
replace Founding=2011 if uniqueid==3598
replace Founding=2011 if uniqueid==3597
replace Founding=2011 if uniqueid==3602
replace Founding=1992 if uniqueid==3480
replace Founding=1964 if uniqueid==1809
replace Founding=2005 if uniqueid==3605
replace Founding=1972 if uniqueid==2148
replace Founding=1995 if uniqueid==2112
replace Founding=1949 if uniqueid==1931
replace Founding=1997 if uniqueid==2814
replace Founding=2007 if uniqueid==3612
replace Founding=2006 if uniqueid==2200
replace Founding=1985 if uniqueid==2278
replace Founding=2005 if uniqueid==3413
replace Founding=2000 if uniqueid==3635
replace Founding=2010 if uniqueid==3391
replace Founding=2005 if uniqueid==2934
replace Founding=2006 if uniqueid==1679
replace Founding=2003 if uniqueid==3224
replace Founding=2002 if uniqueid==2826
replace Founding=1923 if uniqueid==1889
replace Founding=1966 if uniqueid==2245
replace Founding=2005 if uniqueid==1662
replace Founding=2009 if uniqueid==3653
replace Founding=2012 if uniqueid==3666
replace Founding=2005 if uniqueid==3656
replace Founding=2000 if uniqueid==2162
replace Founding=1995 if uniqueid==3240
replace Founding=2007 if uniqueid==1614
replace Founding=2010 if uniqueid==3512
replace Founding=1959 if uniqueid==2457
replace Founding=1999 if uniqueid==2411
replace Founding=1986 if uniqueid==2388
replace Founding=1990 if uniqueid==3034
replace Founding=2013 if uniqueid==2218
replace Founding=2015 if uniqueid==2858
replace Founding=1940 if uniqueid==2474
replace Founding=1987 if uniqueid==1730
replace Founding=2012 if uniqueid==3695

*Merge in annual CPI data

*Merged details on IPOs of firms that are taxed as "flow throughs"

*Run code for 12 Industry classification scheme

*Merge matched portfolios by year and industry
*matched portfolios were created using the following: 
*gen p2s=(prcc_f*csho)/sale
*sort year ffi12
*by year ffi12: egen at33=pctile(at), p(33)
*by year ffi12: egen at66=pctile(at), p(66)
*gen sizeport=0
*replace sizeport=1 if at<=at33
*replace sizeport=2 if at>at33 & at<=at66
*replace sizeport=3 if at>at66
*sort year ffi12 sizeport
*by year ffi12 sizeport: egen medianp2s=pctile(p2s), p(50)

*During revision process merged an industry-quarter measure of exposure to politial risk based on firm-quarter measure from Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun 

********************************************************************************
/*Sample Selection Procedures*/

drop if unit==1 /*unit equals 1 when the security type in SDC is noted as "Units"*/
drop if blankcheck==1 /*blankcheck equals 1 when the business decription in SDC is noted as "Blank Check Company"*/
drop if sic_code>5999 & sic_code<7000
drop if offerprice1>5
drop if scipo==1 /*flow throughs commonly refered to as supercharged IPOs*/

********************************************************************************
/*creating time variables*/

gen post=0
replace post=1 if issue_date>21175 

gen during_elect=0
replace during_elect=1 if sas_issuedate>20766 & sas_issuedate<=21175 

gen period=1 if post==1
replace period=2 if during_elect==1
replace period=3 if post==0 & during_elect==0

gen year=year(issue_date)
gen quarter=1
replace quarter=2 if month(issue_date)==4
replace quarter=2 if month(issue_date)==5
replace quarter=2 if month(issue_date)==6
replace quarter=3 if month(issue_date)==7
replace quarter=3 if month(issue_date)==8
replace quarter=3 if month(issue_date)==9
replace quarter=4 if month(issue_date)==10
replace quarter=4 if month(issue_date)==11
replace quarter=4 if month(issue_date)==12
egen quarteryear=concat(quarter year)

/*created dependent variables*/ 

gen salespershare=(sale1/csho1)
gen op2sales=offerprice1/salespershare
winsor2 op2sales offerprice1 salespershare

gen logop2sales2=log(op2sales_w)

gen med_op2sales=(medianp2s_port1) if at<=at33
replace med_op2sales=(medianp2s_port2) if at>at33 & at<=at66
replace med_op2sales=(medianp2s_port3) if at>at66
gen llrp2s=log(op2sales_w/med_op2sales)
gen logop=log(offerprice1_w)

/*Calculating Control Variables*/

gen age=year-Founding
gen logage=log(1+age)

gen BIGN=0
replace BIGN=1 if au==4
replace BIGN=1 if au==5
replace BIGN=1 if au==6
replace BIGN=1 if au==7
replace BIGN=. if au==.

gen mktret = vwretd_monthpre+vwretd_2monthpre

gen vc=0
replace vc=1 if vc_backed=="Yes" 

gen bvps=(at1-lt1)/csho1

gen indrank=0
replace indrank=1 if underwriterrank>=8

gen midpoint=highprice1-((highprice1-lowprice1)/2)
gen revision=-1*(midpoint-offerprice1)/midpoint
gen reviseup=0
replace reviseup=1 if offerprice1>highprice1
replace reviseup=0 if highprice1==.

winsor2 mktret bvps revision proceeds grossspread_perc1

gen proceeds1_w=log(proceeds_w/CPI)

quietly tabulate ffi12, generate(indy)

/*Calculating x-sectional variables*/
gen ltdtl=txdb
replace ltdtl=txndbl-txdbcl if ltdtl==.
replace ltdtl=txndb-txndba-txdbcl if ltdtl==.
gen inddta=0
replace inddta=1 if txndb>0
replace inddta=1 if ltdtl<0 
replace inddta=. if txndb==. & ltdtl==.
gen ind_dtl=1
replace ind_dtl=0 if inddta==1
replace ind_dtl=. if inddta=. & ltdtl==.

gen foreign=0
replace foreign=1 if txfo!=0 & txfo!=.
replace foreign=1 if pifo!=0 & pifo!=.
gen domestic=1
replace domestic=0 if foreign==1

gen lev=dltt/at
egen medlev=median(lev)
gen lowlev=0
replace lowlev=1 if lev<medlev
replace lowlev=. if lev==.

/*creating IPOTOT and IPORET */
gen month=month(issue_date)
gen year=year(issue_date)
sort year month
quietly by year month: gen count = _N

gen percchange_1day=(price_1dayafter-offerprice1)/offerprice1*100
by year month: egen avgrtn=mean(percchange_1day)

sort year month
gen lag_count = count[_n-1] if month==month[_n-1]+1
bysort month (lag_count) : replace lag_count = lag_count[_n-1] if missing(lag_count) 
gen lag_count2 = lag_count[_n-2] if month==month[_n-1]+1
bysort month (lag_count2) : replace lag_count2 = lag_count2[_n-1] if missing(lag_count2) 
gen ipotot=lag_count+lag_count2

sort year month
gen uprtn_lag1 = avgrtn[_n-1] if month==month[_n-1]+1
bysort month (uprtn_lag1) : replace uprtn_lag1 = uprtn_lag1[_n-1] if missing(uprtn_lag1) 
gen uprtn_lag2 = uprtn_lag1[_n-2] if month==month[_n-1]+1
bysort month (uprtn_lag2) : replace uprtn_lag2 = uprtn_lag2[_n-1] if missing(uprtn_lag2) 
gen iporet=(uprtn_lag1+uprtn_lag2)/2




